Tasks

Objective

We want to replicate the simulations presented in Hozo et al.’s paper.

Theoretically, what did they do?

Estimators

For the sample mean

  • Equation (5)
  • Median

For the sample standard deviation

  • Equation (16)
  • range\(/4\)
  • range\(/6\)

Generate data

I’ll use some functions from my helper functions package dontpanic.

hozo_sim <- tibble(dist = list( # I love this function.
  list(rdist = "norm",
       rpars = list(mean = 50, sd = 17)),
  list(rdist = "lnorm",
       rpars = list(meanlog = 4, meansd = 0.3)),
  list(rdist = "beta",
       rpars = list(9, 4)),
  list(rdist = "exp",
       rpars = 10),
  list(rdist = "weibull",
       rpars = c(2, 35))
)) %>% 
  mutate( # Calculate true mean and sd.
    true_mean = map2_dbl(
      map_chr(dist, "rdist"),
      map(dist, "rpars"),
      .f = dontpanic::get_mean
    ),
    true_sd = pmap_dbl(
      list(
      map_chr(dist, "rdist"),
      map(dist, "rpars"),
      true_mean
      ),
      .f = dontpanic::get_sd
    ),
    rdist = map_chr(dist, "rdist"),
    rpars = map(dist, "rpars")
  ) %>% 
  select(-dist) %>% 
  mutate(
    sim_pars = pmap(list(rdist, rpars, true_mean, true_sd), list)
  ) %>% 
  select(sim_pars)

Get samples

hozo_sim <- cross(
  list(sim_pars = hozo_sim$sim_pars,
       n = seq(8, 100, by = 2),
       sim_index = seq(1, 100))
)

# Convert to tibble.
hozo_sim <- tibble(
  rdist = map_chr(hozo_sim, c(1,1)),
  rpars = map(hozo_sim, c(1,2)),
  true_mean = map_dbl(hozo_sim, c(1,3)),
  true_sd = map_dbl(hozo_sim, c(1, 4)),
  n = map_dbl(hozo_sim, "n")
)  %>% # Generate samples from parameters.
  mutate(sample = pmap(list(n = n, dist = rdist, par = rpars), 
                       dontpanic::get_sample)) %>% 
  mutate( # Calculate summary statistics.
    min = map_dbl(sample, min),
    first_q = map_dbl(sample, quantile, p = 0.25),
    median = map_dbl(sample, median),
    third_q = map_dbl(sample, quantile, p = 0.75),
    max = map_dbl(sample, max)
  )


  # Not sure I need parameters as variables.
  # mutate(
  #   rpar1 = map_dbl(rpars, 1),
  #   rpar2 = map_dbl(rpars, 2, .default = NA) 
  # ) 

# Take a look.
hozo_sim %>% listviewer::jsonedit()

Apply estimators

hozo_sim <- hozo_sim %>% 
  mutate(
    mean_hozo_5 = pmap_dbl(list(
      min, 
      median, 
      max
    ),
    hozo_5),
    sd_hozo_16 = pmap_dbl(list(
      min, 
      median, 
      max
    ),
    hozo_16),
    ran_4 = (max - min) / 4, 
    ran_6 = (max - min) / 6
  ) 

Bundle estimators into lists for long form

# Hmmm not sure this is necessary, after all. Can split them into 
# two simulations now.
# 
# hozo_sim <- hozo_sim %>% # Prepare to gather.
#   mutate(
#     mean_ests = map2(
#       median,
#       mean_hozo_5,
#       .f = function(x, y) {
#         tibble(median = x,
#                hozo_5 = y)
#       }
#     ),
#     sd_ests = pmap(
#       list(ran_4,
#            ran_6,
#            sd_hozo_16),
#       .f = function(x, y, z) {
#         tibble(ran_4 = x,
#                ran_6 = y,
#                hozo_16 = z)
#       }
#     )
#   ) 

Long form

hozo_means <- hozo_sim %>% 
  select(n, rdist, rpars, true_mean, true_sd, median, mean_hozo_5) %>% 
  gather(key = "estimator",
         value = "estimate",
         median, mean_hozo_5) %>% 
  mutate(rel_err = (estimate - true_mean) / true_mean)
  
hozo_sds <- hozo_sim %>% 
  select(n, rdist, rpars, true_mean, true_sd, ran_4, ran_6, sd_hozo_16) %>% 
  gather(key = "estimator",
         value = "estimate",
         ran_4, ran_6, sd_hozo_16) %>% 
  mutate(rel_err = (estimate - true_sd) / true_sd)

Summarise

hozo_mean_results <- hozo_means %>%
  group_by(n, rdist, estimator) %>%
  summarise(rel_err = mean(rel_err))

hozo_sd_results <- hozo_sds %>%
  group_by(n, rdist, estimator) %>%
  summarise(rel_err = mean(rel_err))

Plots

hozo_mean_results %>% 
  group_by(estimator) %>% 
  ggplot(aes(x = n, y = rel_err)) +
  geom_line(aes(colour = estimator)) +
  geom_point(aes(colour = estimator)) +
  facet_grid(rdist ~ .,
             scales = "free_y") +
  labs(title = "Comparison of precision of estimators for sample mean",
       x = "Sample size",
       y = "Mean relative error")

hozo_sd_results %>% 
  group_by(estimator) %>% 
  ggplot(aes(x = n, y = rel_err)) +
  geom_line(aes(colour = estimator)) +
  geom_point(aes(colour = estimator)) +
  facet_grid(rdist ~ .,
             scales = "free_y") +
  labs(title = "Comparison of precision of estimators for sample standard deviation",
       x = "Sample size",
       y = "Mean relative error")